In this script, we perform the 2-D t-SNE of the normalized posterior mean estimates of the most active GTEx cis-eQTLs.
gene_info_eqtl = data.frame(fread("../data/gene_info_eqtl.txt"));
length(gene_info_eqtl[,1]) ## number of IDs that found matches (no repeats)
## [1] 65774
index_unique_IDs=match(unique(gene_info_eqtl[,1]),gene_info_eqtl[,1]);
## all are unique IDs in this case (Check !!)
genesnp.names=as.vector(as.matrix(read.table("../data/gene_snp_names.txt")));
gene_names_GTEX=substring(genesnp.names,1,15);
matching_labels_eqtls_to_gene_info= match(gene_names_GTEX,gene_info_eqtl[,1]);
index_not_found=which(is.na(matching_labels_eqtls_to_gene_info)==TRUE);
Perform t-SNE on the 16407 most active cis-eQTLs and filter the data so that only the cis genes for which we have BioMart info are available.
tsne_data=read.table("../data/tsne_postmean_eqtlbma.txt");
temp=tsne_data[-index_not_found,];
matching_labels=matching_labels_eqtls_to_gene_info[which(!is.na(matching_labels_eqtls_to_gene_info))];
flag=paste0(gene_info_eqtl[matching_labels,1],", ",
"name-",gene_info_eqtl$Associated.Gene.Name[matching_labels],", ",
"descr-",gene_info_eqtl$Description[matching_labels],",",
"chr-",gene_info_eqtl$Chromosome.Name[matching_labels]);
rownames(temp)=flag;
post.mean.shrink = read.table("../data/post.mean.et_withbma.normalized.txt");
#post.mean.shrink=data_max_tscore[complete.cases(data_max_tscore),-c(1,2,47)];
post.mean.shrink.matched=post.mean.shrink[-index_not_found,];
rownames(post.mean.shrink.matched)=rownames(temp);
n_index=2000;
tissue_specific_index=order(apply(post.mean.shrink.matched,1,max),decreasing=TRUE)[1:10000];
plot(temp[tissue_specific_index,1],temp[tissue_specific_index,2],xlab="t-SNE projection 1",ylab="t-SNE projection 2");
suppressMessages(suppressWarnings(iplot(temp[tissue_specific_index,1],temp[tissue_specific_index,2],
rep(1,10000),rownames(temp[tissue_specific_index,]))))
suppressMessages(suppressWarnings(iplotCurves(post.mean.shrink.matched[1:2000,], 1:44, temp[1:2000,])));